# -*- coding: utf-8 -*-
"""
For preprocessing the ipums data (census sample and ACS data)

"""

import sys
import argparse
import string
import codecs
from os import path
import random
from collections import defaultdict
import numpy as np
import operator
import pandas as pd
import glob

random.seed(1776)


def new_county_info():
    return {"totalT": 0.0, "totalNotT": 0.0, "N": 0.0, "totalW1": 0.0, "totalW2": 0.0, "totalX": 0, "totalNotX": 0}

def to_float_or_default_field_value(field_value):
    if field_value == "Suppressed":
        return 0.0
    return float(field_value)

def tof(value):
    return float(value)

def dd(n, d):
    """
    default division
    :return: division, else 0.0
    """
    return n / d if d else 0.0


def ddf(n, d):
    """
    default division, converting both arguments to float
    :return: division, else 0.0
    """
    return float(n) / float(d) if d else 0.0


def save_lines(filename_with_path, list_of_lists):   
    with codecs.open(filename_with_path, "w", encoding="utf-8") as f:
        f.writelines(list_of_lists)


def get_int_rowval(row, index_into_row):
    rowval = None
    try:
        rowval = int(row[index_into_row])
    except:
        pass
    return rowval


def get_float_rowval(row, index_into_row):
    rowval = None
    try:
        rowval = float(row[index_into_row])
    except:
        pass
    return rowval



def save_spreadsheet(counties, county_dict, output_dir, label):
    total_n = 0.0
    lines = []
    lines.append(",".join(["X", "notX", "T", "notT", "W1", "W2", "N"]) + "\n")
    for county in counties:
        county_info = county_dict[county]
        # calculate true betas:
        #print county_info["totalX"] + county_info["totalNotX"], county_info["N"]
        #assert county_info["totalX"] + county_info["totalNotX"] == county_info["N"]
        # allow a tolerance of 1:
        assert abs((county_info["totalX"] + county_info["totalNotX"]) - county_info["N"]) <= 1.0
        # ["X", "notX", "T", "notT", "W1", "W2", "N"])
        formatted_line = [
            ddf(county_info["totalX"], county_info["N"]),
            ddf(county_info["totalNotX"], county_info["N"]),
            ddf(county_info["totalT"], county_info["N"]),
            ddf(county_info["totalNotT"], county_info["N"]),
            ddf(county_info["totalW1"], county_info["totalX"]),
            ddf(county_info["totalW2"], county_info["totalNotX"]),
            county_info["N"]
        ]

        lines.append(",".join(["%f" % val for val in formatted_line]) + "\n")

        total_n += county_info["N"]
    save_lines(path.join(output_dir, label), lines)
    print "Total sum of N across counties:", total_n


def process_and_save_data(filepath_with_name, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1, XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, use_mcd=False):
    current_dataset_index = 0
    year_label = year_labels[current_dataset_index]
    label = GEN_LABEL.format(year_label=year_label)

    ctr = 0

    seen_year = False
    # 'county' here refers to the geographic unit
    counties = defaultdict(int)
    county_dict = defaultdict(int)  # index: county id; value: new_county_info()

    hhwt_dict = defaultdict(int)
    perwt_dict = defaultdict(int)

    households = defaultdict(int)
    persons = defaultdict(int)


    with codecs.open(filepath_with_name, encoding="utf-8") as f:
        for line in f:
            if ctr % 5000000 == 0:
                print "Line %d" % ctr
            row = line.strip().split(",")
            if ctr == 0:  # header
                header = [x.strip('"') for x in row]
                print header
                year_index = header.index("YEAR")

                state_index = header.index("STATEICP")
                county_index = header.index("COUNTY")
                if use_mcd:
                    mcd_index = header.index("MCD")

                xvar_index = header.index(XVARVAL_LABEL)
                tvar_index = header.index(TVARVAL_LABEL)

                hhwt_index = header.index("HHWT")
                perwt_index = header.index("PERWT")

                # check that each person is included only once
                #A combination of YEAR, DATANUM, and SERIAL provides a unique identifier for every household in the IPUMS; the combination of YEAR, DATANUM, SERIAL, and PERNUM uniquely identifies every person in the database.
                datanum_index = header.index("DATANUM")
                serial_index = header.index("SERIAL")  # household
                pernum_index = header.index("PERNUM")  # person


            else:  # body
                line = line.strip().split(",")
                year = get_int_rowval(row, year_index)
                county_id = get_int_rowval(row, county_index)
                state = get_int_rowval(row, state_index)

                datanum = get_int_rowval(row, datanum_index)
                serial = get_int_rowval(row, serial_index)
                pernum = get_int_rowval(row, pernum_index)

                xvar = get_int_rowval(row, xvar_index)
                tvar = get_int_rowval(row, tvar_index)

                hhwt = get_float_rowval(row, hhwt_index)
                perwt = get_float_rowval(row, perwt_index)

                if year != int(year_label) and seen_year:
                    save_spreadsheet(counties, county_dict, output_dir, label)
                    print "Finished processing year", year_label
                    print "Final ctr", ctr
                    print "Number of counties", len(counties)

                    print "hhwt_dict:", hhwt_dict
                    print "perwt_dict:", perwt_dict

                    print "Number of unique households:", len(households)
                    print "Number of unique people:", len(persons)

                    hhwt_dict = defaultdict(int)
                    perwt_dict = defaultdict(int)

                    counties = defaultdict(int)
                    county_dict = defaultdict(int)

                    households = defaultdict(int)
                    persons = defaultdict(int)

                    current_dataset_index += 1
                    if current_dataset_index > len(year_labels) - 1:
                        break
                    year_label = year_labels[current_dataset_index]
                    label = GEN_LABEL.format(year_label=year_label)
                    seen_year = False

                if year == int(year_label):
                    seen_year = True

                    if xvar in XVARVAL_SET and tvar in TVARVAL_SET:

                        hhwt_dict[hhwt] += 1
                        perwt_dict[perwt] += 1

                        if use_mcd:
                            mcd = get_int_rowval(row, mcd_index)
                            county = "%d-%d-%d" % (state, county_id, mcd)
                        else:
                            # a county is uniquely identified by the county id and the state id
                            county = "%d-%d" % (state, county_id)
                        counties[county] += 1

                        household = "%d-%d-%d" % (year, datanum, serial)
                        households[household] += 1

                        person = "%d-%d-%d-%d" % (year, datanum, serial, pernum)
                        persons[person] += 1
                        if persons[person] > 1:
                            print "WARNING: person %s occurs with frequency %d in line:" % (person, persons[person]), line

                        # add data:
                        if county not in county_dict:
                            county_info = new_county_info()
                        else:
                            county_info = county_dict[county]

                        if xvar == XVARVAL_1:
                            county_info["totalX"] += (perwt * 1.0)
                        else:
                            county_info["totalNotX"] += (perwt * 1.0)

                        if tvar == TVARVAL_1:
                            county_info["totalT"] += (perwt * 1.0)
                        else:
                            county_info["totalNotT"] += (perwt * 1.0)

                        if xvar == XVARVAL_1 and tvar == TVARVAL_1:
                            county_info["totalW1"] += (perwt * 1.0)
                        if xvar != XVARVAL_1 and tvar == TVARVAL_1:
                            county_info["totalW2"] += (perwt * 1.0)

                        county_info["N"] += (perwt * 1.0)

                        county_dict[county] = county_info

            ctr += 1

def process_and_save_data_ranges(filepath_with_name, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1, XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_MIN, TVARVAL_MAX, use_mcd=False):
    current_dataset_index = 0
    year_label = year_labels[current_dataset_index]
    label = GEN_LABEL.format(year_label=year_label)

    ctr = 0

    seen_year = False
    # 'county' here refers to the geographic unit
    counties = defaultdict(int)
    county_dict = defaultdict(int)  # index: county id; value: new_county_info()

    hhwt_dict = defaultdict(int)
    perwt_dict = defaultdict(int)

    households = defaultdict(int)
    persons = defaultdict(int)


    with codecs.open(filepath_with_name, encoding="utf-8") as f:
        for line in f:
            if ctr % 5000000 == 0:
                print "Line %d" % ctr
            row = line.strip().split(",")
            if ctr == 0:  # header
                header = [x.strip('"') for x in row]
                print header
                year_index = header.index("YEAR")

                state_index = header.index("STATEICP")
                county_index = header.index("COUNTY")
                if use_mcd:
                    mcd_index = header.index("MCD")

                xvar_index = header.index(XVARVAL_LABEL)
                tvar_index = header.index(TVARVAL_LABEL)

                hhwt_index = header.index("HHWT")
                perwt_index = header.index("PERWT")

                # check that each person is included only once
                #A combination of YEAR, DATANUM, and SERIAL provides a unique identifier for every household in the IPUMS; the combination of YEAR, DATANUM, SERIAL, and PERNUM uniquely identifies every person in the database.
                datanum_index = header.index("DATANUM")
                serial_index = header.index("SERIAL")  # household
                pernum_index = header.index("PERNUM")  # person


            else:  # body
                line = line.strip().split(",")
                year = get_int_rowval(row, year_index)
                county_id = get_int_rowval(row, county_index)
                state = get_int_rowval(row, state_index)

                datanum = get_int_rowval(row, datanum_index)
                serial = get_int_rowval(row, serial_index)
                pernum = get_int_rowval(row, pernum_index)

                xvar = get_int_rowval(row, xvar_index)
                tvar = get_int_rowval(row, tvar_index)

                hhwt = get_float_rowval(row, hhwt_index)
                perwt = get_float_rowval(row, perwt_index)

                if year != int(year_label) and seen_year:
                    save_spreadsheet(counties, county_dict, output_dir, label)
                    print "Finished processing year", year_label
                    print "Final ctr", ctr
                    print "Number of counties", len(counties)

                    print "hhwt_dict:", hhwt_dict
                    print "perwt_dict:", perwt_dict

                    print "Number of unique households:", len(households)
                    print "Number of unique people:", len(persons)

                    hhwt_dict = defaultdict(int)
                    perwt_dict = defaultdict(int)

                    counties = defaultdict(int)
                    county_dict = defaultdict(int)

                    households = defaultdict(int)
                    persons = defaultdict(int)

                    current_dataset_index += 1
                    if current_dataset_index > len(year_labels) - 1:
                        break
                    year_label = year_labels[current_dataset_index]
                    label = GEN_LABEL.format(year_label=year_label)
                    seen_year = False

                if year == int(year_label):
                    seen_year = True

                    if xvar in XVARVAL_SET and (tvar >= TVARVAL_MIN and tvar <= TVARVAL_MAX):

                        hhwt_dict[hhwt] += 1
                        perwt_dict[perwt] += 1

                        if use_mcd:
                            mcd = get_int_rowval(row, mcd_index)
                            county = "%d-%d-%d" % (state, county_id, mcd)
                        else:
                            # a county is uniquely identified by the county id and the state id
                            county = "%d-%d" % (state, county_id)
                        counties[county] += 1

                        household = "%d-%d-%d" % (year, datanum, serial)
                        households[household] += 1

                        person = "%d-%d-%d-%d" % (year, datanum, serial, pernum)
                        persons[person] += 1
                        if persons[person] > 1:
                            print "WARNING: person %s occurs with frequency %d in line:" % (person, persons[person]), line

                        # add data:
                        if county not in county_dict:
                            county_info = new_county_info()
                        else:
                            county_info = county_dict[county]

                        if xvar == XVARVAL_1:
                            county_info["totalX"] += (perwt * 1.0)
                        else:
                            county_info["totalNotX"] += (perwt * 1.0)

                        if tvar < TVARVAL_1:
                            county_info["totalT"] += (perwt * 1.0)
                        else:
                            county_info["totalNotT"] += (perwt * 1.0)

                        if xvar == XVARVAL_1 and tvar < TVARVAL_1:
                            county_info["totalW1"] += (perwt * 1.0)
                        if xvar != XVARVAL_1 and tvar < TVARVAL_1:
                            county_info["totalW2"] += (perwt * 1.0)

                        county_info["N"] += (perwt * 1.0)

                        county_dict[county] = county_info

            ctr += 1

def process_and_save_data_sets(filepath_with_name, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1_SET, XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, use_mcd=False):
    current_dataset_index = 0
    year_label = year_labels[current_dataset_index]
    label = GEN_LABEL.format(year_label=year_label)

    ctr = 0

    seen_year = False
    # 'county' here refers to the geographic unit
    counties = defaultdict(int)
    county_dict = defaultdict(int)  # index: county id; value: new_county_info()

    hhwt_dict = defaultdict(int)
    perwt_dict = defaultdict(int)

    households = defaultdict(int)
    persons = defaultdict(int)


    with codecs.open(filepath_with_name, encoding="utf-8") as f:
        for line in f:
            if ctr % 5000000 == 0:
                print "Line %d" % ctr
            row = line.strip().split(",")
            if ctr == 0:  # header
                header = [x.strip('"') for x in row]
                print header
                year_index = header.index("YEAR")

                state_index = header.index("STATEICP")
                county_index = header.index("COUNTY")
                if use_mcd:
                    mcd_index = header.index("MCD")

                xvar_index = header.index(XVARVAL_LABEL)
                tvar_index = header.index(TVARVAL_LABEL)

                hhwt_index = header.index("HHWT")
                perwt_index = header.index("PERWT")

                # check that each person is included only once
                #A combination of YEAR, DATANUM, and SERIAL provides a unique identifier for every household in the IPUMS; the combination of YEAR, DATANUM, SERIAL, and PERNUM uniquely identifies every person in the database.
                datanum_index = header.index("DATANUM")
                serial_index = header.index("SERIAL")  # household
                pernum_index = header.index("PERNUM")  # person


            else:  # body
                line = line.strip().split(",")
                year = get_int_rowval(row, year_index)
                county_id = get_int_rowval(row, county_index)
                state = get_int_rowval(row, state_index)

                datanum = get_int_rowval(row, datanum_index)
                serial = get_int_rowval(row, serial_index)
                pernum = get_int_rowval(row, pernum_index)

                xvar = get_int_rowval(row, xvar_index)
                tvar = get_int_rowval(row, tvar_index)

                hhwt = get_float_rowval(row, hhwt_index)
                perwt = get_float_rowval(row, perwt_index)

                if year != int(year_label) and seen_year:
                    save_spreadsheet(counties, county_dict, output_dir, label)
                    print "Finished processing year", year_label
                    print "Final ctr", ctr
                    print "Number of counties", len(counties)

                    print "hhwt_dict:", hhwt_dict
                    print "perwt_dict:", perwt_dict

                    print "Number of unique households:", len(households)
                    print "Number of unique people:", len(persons)

                    hhwt_dict = defaultdict(int)
                    perwt_dict = defaultdict(int)

                    counties = defaultdict(int)
                    county_dict = defaultdict(int)

                    households = defaultdict(int)
                    persons = defaultdict(int)

                    current_dataset_index += 1
                    if current_dataset_index > len(year_labels) - 1:
                        break
                    year_label = year_labels[current_dataset_index]
                    label = GEN_LABEL.format(year_label=year_label)
                    seen_year = False

                if year == int(year_label):
                    seen_year = True

                    if xvar in XVARVAL_SET and tvar in TVARVAL_SET:

                        hhwt_dict[hhwt] += 1
                        perwt_dict[perwt] += 1

                        if use_mcd:
                            mcd = get_int_rowval(row, mcd_index)
                            county = "%d-%d-%d" % (state, county_id, mcd)
                        else:
                            # a county is uniquely identified by the county id and the state id
                            county = "%d-%d" % (state, county_id)
                        counties[county] += 1

                        household = "%d-%d-%d" % (year, datanum, serial)
                        households[household] += 1

                        person = "%d-%d-%d-%d" % (year, datanum, serial, pernum)
                        persons[person] += 1
                        if persons[person] > 1:
                            print "WARNING: person %s occurs with frequency %d in line:" % (person, persons[person]), line

                        # add data:
                        if county not in county_dict:
                            county_info = new_county_info()
                        else:
                            county_info = county_dict[county]

                        if xvar == XVARVAL_1:
                            county_info["totalX"] += (perwt * 1.0)
                        else:
                            county_info["totalNotX"] += (perwt * 1.0)

                        if tvar in TVARVAL_1_SET:
                            county_info["totalT"] += (perwt * 1.0)
                        else:
                            county_info["totalNotT"] += (perwt * 1.0)

                        if xvar == XVARVAL_1 and tvar in TVARVAL_1_SET:
                            county_info["totalW1"] += (perwt * 1.0)
                        if xvar != XVARVAL_1 and tvar in TVARVAL_1_SET:
                            county_info["totalW2"] += (perwt * 1.0)

                        county_info["N"] += (perwt * 1.0)

                        county_dict[county] = county_info

            ctr += 1

def main(arguments):

    parser = argparse.ArgumentParser(description=__doc__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--run_set', help="run_set")
    parser.add_argument('--input_csv_census_sample_data_file', help="input_csv_census_sample_data_file")
    parser.add_argument('--input_csv_acs_sample_data_file', help="input_csv_acs_sample_data_file")
    parser.add_argument('--output_dir', help="output_dir")

    args = parser.parse_args(arguments)
    run_set = args.run_set
    input_csv_census_sample_data_file = args.input_csv_census_sample_data_file
    input_csv_acs_sample_data_file = args.input_csv_acs_sample_data_file
    output_dir = args.output_dir

    run_set = [int(x) for x in run_set.strip().split(",")]
    print "Note that in the current version, the final year will be cutoff without adding a final line to the .csv"
    print "Considering the following run set:", run_set


    for run_number in run_set:
        print "Currently processing run", run_number
        if run_number == 1:
            year_labels = "1850,1860,1870,1880,1900,1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_menwomen_literate_notliterate__x-is-men_t-is-LIT.csv"

            XVARVAL_1 = 1  # Male
            XVARVAL_2 = 2  # Female

            TVARVAL_1 = 4  # Yes, literate (reads and writes)

            XVARVAL_LABEL = "SEX"
            TVARVAL_LABEL = "LIT"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [1, 2, 3, 4]

            # only in census data
            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # also by mcd:
            GEN_LABEL = "ipums_sample_census_by_mcd_year{year_label}_menwomen_literate_notliterate__x-is-men_t-is-LIT.csv"
            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, True)

        elif run_number == 2:
            year_labels = "1850,1860,1870,1880,1900,1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_blackwhite_literate_notliterate__x-is-black_t-is-LIT.csv"

            XVARVAL_1 = 2  # BLACK
            XVARVAL_2 = 1  # WHITE

            TVARVAL_1 = 4  # Yes, literate (reads and writes)

            XVARVAL_LABEL = "RACE"
            TVARVAL_LABEL = "LIT"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [1, 2, 3, 4]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # also by mcd:
            GEN_LABEL = "ipums_sample_census_by_mcd_year{year_label}_blackwhite_literate_notliterate__x-is-black_t-is-LIT.csv"
            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, True)


        elif run_number == 3:
            year_labels = "1850,1860,1870,1880,1900,1910,1920,1930,1940,1950,1960,1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_blackwhite_notinlaborforce_inlaborforce__x-is-black_t-is-LABFORCE.csv"

            XVARVAL_1 = 2  # BLACK
            XVARVAL_2 = 1  # WHITE

            TVARVAL_1 = 1  # No, not in the labor force
            TVARVAL_2 = 2  # Yes, in the labor force

            XVARVAL_LABEL = "RACE"
            TVARVAL_LABEL = "LABFORCE"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [TVARVAL_1,TVARVAL_2]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # also by mcd:
            year_labels = "1850,1860,1870,1880,1900,1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_by_mcd_year{year_label}_blackwhite_notinlaborforce_inlaborforce__x-is-black_t-is-LABFORCE.csv"
            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, True)

            # ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_blackwhite_notinlaborforce_inlaborforce__x-is-black_t-is-LABFORCE.csv"
            process_and_save_data(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

        elif run_number == 4:
            year_labels = "1850,1860,1870,1880,1900,1910,1920,1930,1940,1950,1960,1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_menwomen_notinlaborforce_inlaborforce__x-is-men_t-is-LABFORCE.csv"

            XVARVAL_1 = 1  # Male
            XVARVAL_2 = 2  # Female

            TVARVAL_1 = 1  # No, not in the labor force
            TVARVAL_2 = 2  # Yes, in the labor force

            XVARVAL_LABEL = "SEX"
            TVARVAL_LABEL = "LABFORCE"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [TVARVAL_1, TVARVAL_2]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # also by mcd:
            year_labels = "1850,1860,1870,1880,1900,1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_by_mcd_year{year_label}_menwomen_notinlaborforce_inlaborforce__x-is-men_t-is-LABFORCE.csv"
            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, True)

            # ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_menwomen_notinlaborforce_inlaborforce__x-is-men_t-is-LABFORCE.csv"
            process_and_save_data(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)



        elif run_number == 5:
            year_labels = "1870,1880,1900,1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_foreignborn_nativeborn_literate_notliterate_x-is-foreignborn_t-is-LIT.csv"

            XVARVAL_1 = 5  # Foreign born

            TVARVAL_1 = 4  # Yes, literate (reads and writes)

            XVARVAL_LABEL = "NATIVITY"
            TVARVAL_LABEL = "LIT"

            XVARVAL_SET = [1, 2, 3, 4, 5]
            TVARVAL_SET = [1, 2, 3, 4]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # also by mcd:
            year_labels = "1870,1880,1900,1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_by_mcd_year{year_label}_foreignborn_nativeborn_literate_notliterate_x-is-foreignborn_t-is-LIT.csv"
            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, True)


        elif run_number == 6:
            year_labels = "1900,1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_foreignborn_nativeborn_speaksenglish_doesnotspeakenglish_x-is-foreignborn_t-is-SPEAKENG.csv"

            XVARVAL_1 = 5  # Foreign born

            # for these years (1900-1930), only 1,2 are an option
            TVARVAL_1 = 2  # Yes, speaks English...
            TVARVAL_2 = 1  # Does not speak English

            XVARVAL_LABEL = "NATIVITY"
            TVARVAL_LABEL = "SPEAKENG"

            XVARVAL_SET = [1, 2, 3, 4, 5]
            TVARVAL_SET = [TVARVAL_1, TVARVAL_2]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)


            # also by mcd:
            year_labels = "1900,1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_by_mcd_year{year_label}_foreignborn_nativeborn_speaksenglish_doesnotspeakenglish_x-is-foreignborn_t-is-SPEAKENG.csv"
            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, True)




        elif run_number == 7:
            year_labels = "1870,1880,1900,1910,1920,1930,1960".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_blackwhite_foreignborn_nativeborn__x-is-black_t-is-NATIVITY.csv"

            XVARVAL_1 = 2  # BLACK
            XVARVAL_2 = 1  # WHITE

            TVARVAL_1 = 5  # Foreign born

            XVARVAL_LABEL = "RACE"
            TVARVAL_LABEL = "NATIVITY"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [1, 2, 3, 4, 5]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # also by mcd:
            year_labels = "1870,1880,1900,1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_by_mcd_year{year_label}_blackwhite_foreignborn_nativeborn__x-is-black_t-is-NATIVITY.csv"
            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, True)

        elif run_number == 8:
            year_labels = "1870,1880,1900,1910,1920,1930,1960".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_menwomen_foreignborn_nativeborn__x-is-men_t-is-NATIVITY.csv"

            XVARVAL_1 = 1  # Male
            XVARVAL_2 = 2  # Female

            TVARVAL_1 = 5  # Foreign born

            XVARVAL_LABEL = "SEX"
            TVARVAL_LABEL = "NATIVITY"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [1, 2, 3, 4, 5]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # also by mcd:
            year_labels = "1870,1880,1900,1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_by_mcd_year{year_label}_foreignborn_nativeborn__x-is-men_t-is-NATIVITY.csv"
            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, True)


        elif run_number == 9:
            year_labels = "1910,1920,1930,1940,1950,1960,1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_menwomen_selfemployed_worksforwages__x-is-men_t-is-CLASSWKR.csv"

            XVARVAL_1 = 1  # Male
            XVARVAL_2 = 2  # Female

            TVARVAL_1 = 1  # Self-employed
            TVARVAL_2 = 2  # Works for wages

            XVARVAL_LABEL = "SEX"
            TVARVAL_LABEL = "CLASSWKR"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [TVARVAL_1, TVARVAL_2]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET,
                                  TVARVAL_1, XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # also by mcd:
            year_labels = "1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_by_mcd_year{year_label}_menwomen_selfemployed_worksforwages__x-is-men_t-is-CLASSWKR.csv"
            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, True)

            # ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_menwomen_selfemployed_worksforwages__x-is-men_t-is-CLASSWKR.csv"
            process_and_save_data(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

        elif run_number == 10:
            year_labels = "1910,1920,1930,1950,1960,1980,1990,2000".split(",")
            #year_labels = "1950,1960,1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_blackwhite_selfemployed_worksforwages__x-is-black_t-is-CLASSWKR.csv"

            XVARVAL_1 = 2  # BLACK
            XVARVAL_2 = 1  # WHITE

            TVARVAL_1 = 1  # Self-employed
            TVARVAL_2 = 2  # Works for wages

            XVARVAL_LABEL = "RACE"
            TVARVAL_LABEL = "CLASSWKR"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [TVARVAL_1, TVARVAL_2]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET,
                                  TVARVAL_1, XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # also by mcd:
            year_labels = "1910,1920,1930".split(",")
            GEN_LABEL = "ipums_sample_census_by_mcd_year{year_label}_blackwhite_selfemployed_worksforwages__x-is-black_t-is-CLASSWKR.csv"
            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET, True)

            # ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_blackwhite_selfemployed_worksforwages__x-is-black_t-is-CLASSWKR.csv"
            process_and_save_data(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)


        elif run_number == 11:
            year_labels = "1950,1960,1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_blackwhite_vet_notvet__x-is-black_t-is-vetstat.csv"

            XVARVAL_1 = 2  # BLACK
            XVARVAL_2 = 1  # WHITE

            TVARVAL_1 = 2  # VET
            TVARVAL_2 = 1  # NOT_VET

            XVARVAL_LABEL = "RACE"
            TVARVAL_LABEL = "VETSTAT"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [TVARVAL_1, TVARVAL_2]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET,
                                   TVARVAL_1, XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_blackwhite_vet_notvet__x-is-black_t-is-vetstat.csv"
            process_and_save_data(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)


        elif run_number == 12:
            year_labels = "1930,1950,1960,1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_blackwhite_unemployed_employed__x-is-black_t-is-EMPSTAT.csv"

            XVARVAL_1 = 2  # BLACK
            XVARVAL_2 = 1  # WHITE

            TVARVAL_1 = 2  # Unemployed
            TVARVAL_2 = 1  # Employed

            XVARVAL_LABEL = "RACE"
            TVARVAL_LABEL = "EMPSTAT"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [TVARVAL_1, TVARVAL_2]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET,
                                  TVARVAL_1, XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # by ACS
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_blackwhite_unemployed_employed__x-is-black_t-is-EMPSTAT.csv"
            process_and_save_data(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)



        elif run_number == 13:
            year_labels = "1930,1950,1960,1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_menwomen_unemployed_employed__x-is-men_t-is-EMPSTAT.csv"

            XVARVAL_1 = 1  # Male
            XVARVAL_2 = 2  # Female

            TVARVAL_1 = 2  # Unemployed
            TVARVAL_2 = 1  # Employed

            XVARVAL_LABEL = "SEX"
            TVARVAL_LABEL = "EMPSTAT"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [TVARVAL_1, TVARVAL_2]

            process_and_save_data(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET,
                                  TVARVAL_1, XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

            # by ACS
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_menwomen_unemployed_employed__x-is-men_t-is-EMPSTAT.csv"
            process_and_save_data(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)



        elif run_number == 14:
            year_labels = "1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_foreignbornnoncitizen_foreignborncitizen_doesnotspeakenglishverywell_speaksverywell__x-is-foreignbornnoncitizen_t-is-SPEAKENG.csv"

            XVARVAL_1 = 3  # Not a citizen
            #XVARVAL_2 = [1, 2]  # Born abroad of American parents || Naturalized citizen

            TVARVAL_1_SET = [1, 5, 6]  # Does not speak English || Yes, speaks well || Yes, but not well
            #TVARVAL_2_SET = [3, 4]  # Yes, speaks only English || Yes, speaks very well

            XVARVAL_LABEL = "CITIZEN"
            TVARVAL_LABEL = "SPEAKENG"

            XVARVAL_SET = [1, 2, 3]
            TVARVAL_SET = [1, 3, 4, 5, 6]


            process_and_save_data_sets(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1_SET,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)


            # by ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_foreignbornnoncitizen_foreignborncitizen_doesnotspeakenglishverywell_speaksverywell__x-is-foreignbornnoncitizen_t-is-SPEAKENG.csv"
            process_and_save_data_sets(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1_SET,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

        elif run_number == 15:
            year_labels = "1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_menwomen_speaksenglishverywell_doesnotspeakenglishverywell_x-is-men_t-is-SPEAKENG.csv"

            XVARVAL_1 = 1  # Male
            XVARVAL_2 = 2  # Female

            TVARVAL_1_SET = [3, 4]  # Yes, speaks only English || Yes, speaks very well
            #TVARVAL_2_SET = [1, 5, 6]  # Does not speak English || Yes, speaks well || Yes, but not well

            XVARVAL_LABEL = "SEX"
            TVARVAL_LABEL = "SPEAKENG"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [1, 3, 4, 5, 6]

            process_and_save_data_sets(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1_SET,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)


            # by ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_menwomen_speaksenglishverywell_doesnotspeakenglishverywell_x-is-men_t-is-SPEAKENG.csv"
            process_and_save_data_sets(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1_SET,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)


        elif run_number == 16:
            year_labels = "1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_blackwhite_speaksenglishverywell_doesnotspeakenglishverywell_x-is-black_t-is-SPEAKENG.csv"

            XVARVAL_1 = 2  # BLACK
            XVARVAL_2 = 1  # WHITE

            TVARVAL_1_SET = [3, 4]  # Yes, speaks only English || Yes, speaks very well
            #TVARVAL_2_SET = [1, 5, 6]  # Does not speak English || Yes, speaks well || Yes, but not well

            XVARVAL_LABEL = "RACE"
            TVARVAL_LABEL = "SPEAKENG"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [1, 3, 4, 5, 6]

            process_and_save_data_sets(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1_SET,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)


            # by ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_blackwhite_speaksenglishverywell_doesnotspeakenglishverywell_x-is-black_t-is-SPEAKENG.csv"
            process_and_save_data_sets(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1_SET,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)



        elif run_number == 17:

            XVARVAL_1 = 1  # Male
            XVARVAL_2 = 2  # Female

            TVARVAL_1 = 1  # No health insurance coverage
            TVARVAL_2 = 2  # With health insurance coverage

            XVARVAL_LABEL = "SEX"
            TVARVAL_LABEL = "HCOVANY"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [TVARVAL_1, TVARVAL_2]


            # only ACS:
            year_labels = "2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_menwomen_nohealthinsurance_healthinsurance__x-is-men_t-is-HCOVANY.csv"
            process_and_save_data(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

        elif run_number == 18:

            XVARVAL_1 = 2  # BLACK
            XVARVAL_2 = 1  # WHITE

            TVARVAL_1 = 1  # No health insurance coverage
            TVARVAL_2 = 2  # With health insurance coverage

            XVARVAL_LABEL = "RACE"
            TVARVAL_LABEL = "HCOVANY"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [TVARVAL_1, TVARVAL_2]


            # only ACS:
            year_labels = "2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_blackwhite_nohealthinsurance_healthinsurance__x-is-black_t-is-HCOVANY.csv"
            process_and_save_data(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

        elif run_number == 19:

            year_labels = "1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_blackwhite_foreignborncitizen_foreignbornnoncitizen_x-is-black_t-is-CITIZEN.csv"

            XVARVAL_1 = 2  # BLACK
            XVARVAL_2 = 1  # WHITE

            TVARVAL_1_SET = [1, 2]  # Born abroad of American parents || Naturalized citizen
            #TVARVAL_2_SET = [3]  # Not a citizen

            XVARVAL_LABEL = "RACE"
            TVARVAL_LABEL = "CITIZEN"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [1, 2, 3]

            process_and_save_data_sets(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1_SET,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)


            # by ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_blackwhite_foreignborncitizen_foreignbornnoncitizen_x-is-black_t-is-CITIZEN.csv"
            process_and_save_data_sets(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1_SET,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

        elif run_number == 20:

            year_labels = "1980,1990,2000".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_menwomen_foreignborncitizen_foreignbornnoncitizen_x-is-men_t-is-CITIZEN.csv"

            XVARVAL_1 = 1  # Male
            XVARVAL_2 = 2  # Female

            TVARVAL_1_SET = [1, 2]  # Born abroad of American parents || Naturalized citizen
            #TVARVAL_2_SET = [3]  # Not a citizen

            XVARVAL_LABEL = "SEX"
            TVARVAL_LABEL = "CITIZEN"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]
            TVARVAL_SET = [1, 2, 3]


            process_and_save_data_sets(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET, TVARVAL_1_SET,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)


            # by ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_menwomen_foreignborncitizen_foreignbornnoncitizen_x-is-men_t-is-CITIZEN.csv"
            process_and_save_data_sets(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET, TVARVAL_1_SET,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_SET)

        elif run_number == 21:
            year_labels = "1950,1960,1980,1990".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_menwomen_belowpovertythreshold_povertythreshold100orhigher__x-is-black_t-is-POVERTY.csv"

            XVARVAL_1 = 1  # Male
            XVARVAL_2 = 2  # Female

            TVARVAL_1 = 100

            XVARVAL_LABEL = "SEX"
            TVARVAL_LABEL = "POVERTY"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]

            # POVERTY Specific Variable Codes
            # 000 = N/A
            # 001 = 1 percent or less of poverty threshold
            # 501 = 501 percent or more of poverty threshold


            TVARVAL_MIN = 001
            TVARVAL_MAX = 501

            process_and_save_data_ranges(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET,
                                  TVARVAL_1, XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_MIN, TVARVAL_MAX)


            # ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_menwomen_belowpovertythreshold_povertythreshold100orhigher__x-is-black_t-is-POVERTY.csv"
            process_and_save_data_ranges(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_MIN, TVARVAL_MAX)

        elif run_number == 22:
            year_labels = "1950,1960,1980,1990".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_blackwhite_belowpovertythreshold_povertythreshold100orhigher__x-is-black_t-is-POVERTY.csv"

            XVARVAL_1 = 2  # BLACK
            XVARVAL_2 = 1  # WHITE

            TVARVAL_1 = 100

            XVARVAL_LABEL = "RACE"
            TVARVAL_LABEL = "POVERTY"

            XVARVAL_SET = [XVARVAL_1, XVARVAL_2]

            # POVERTY Specific Variable Codes
            # 000 = N/A
            # 001 = 1 percent or less of poverty threshold
            # 501 = 501 percent or more of poverty threshold


            TVARVAL_MIN = 001
            TVARVAL_MAX = 501

            process_and_save_data_ranges(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET,
                                  TVARVAL_1, XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_MIN, TVARVAL_MAX)


            # ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_blackwhite_belowpovertythreshold_povertythreshold100orhigher__x-is-black_t-is-POVERTY.csv"
            process_and_save_data_ranges(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_MIN, TVARVAL_MAX)

        elif run_number == 23:
            year_labels = "1980,1990".split(",")
            GEN_LABEL = "ipums_sample_census_year{year_label}_foreignbornnoncitizen_foreignborncitizen_belowpovertythreshold_povertythreshold100orhigher__x-is-foreignbornnoncitizen_t-is-POVERTY.csv"

            XVARVAL_1 = 3  # Not a citizen
            XVARVAL_2 = [1, 2]  # Born abroad of American parents || Naturalized citizen

            TVARVAL_1 = 100

            XVARVAL_LABEL = "CITIZEN"
            TVARVAL_LABEL = "POVERTY"

            XVARVAL_SET = [1, 2, 3]

            # POVERTY Specific Variable Codes
            # 000 = N/A
            # 001 = 1 percent or less of poverty threshold
            # 501 = 501 percent or more of poverty threshold


            TVARVAL_MIN = 001
            TVARVAL_MAX = 501

            process_and_save_data_ranges(input_csv_census_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1, XVARVAL_SET,
                                  TVARVAL_1, XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_MIN, TVARVAL_MAX)


            # ACS:
            year_labels = "2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016".split(",")
            GEN_LABEL = "ipums_acs_census_year{year_label}_foreignbornnoncitizen_foreignborncitizen_belowpovertythreshold_povertythreshold100orhigher__x-is-foreignbornnoncitizen_t-is-POVERTY.csv"
            process_and_save_data_ranges(input_csv_acs_sample_data_file, output_dir, year_labels, GEN_LABEL, XVARVAL_1,
                                  XVARVAL_SET,
                                  TVARVAL_1,
                                  XVARVAL_LABEL, TVARVAL_LABEL, TVARVAL_MIN, TVARVAL_MAX)

if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))



